home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
EnigmA Amiga Run 1996 February
/
EnigmA AMIGA RUN 04 (1996)(G.R. Edizioni)(IT)[!][issue 1996-02][Skylink CD III].iso
/
earcd
/
gnu
/
recode33.lha
/
recode-3.3
/
recode.c
< prev
next >
Wrap
C/C++ Source or Header
|
1993-12-22
|
63KB
|
2,379 lines
/* Conversion of files between different charsets and usages.
Copyright (C) 1990, 1992, 1993 Free Software Foundation, Inc.
Francois Pinard <pinard@iro.umontreal.ca>, 1990.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2, or (at your option)
any later version.
This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
#include "recode.h"
/* Maximum number of single step methods. */
#define MAX_SINGLE_STEPS 300
/* Maximum length of a conversion sequence. */
#define MAX_SEQUENCE 12
/* In the `BEFORE:AFTER' parameter, there is a default supplied whenever
`:AFTER' or `BEFORE:' are used. */
#ifndef DEFAULT_CHARSET
#ifdef MSDOS
#define DEFAULT_CHARSET "ibmpc"
#else
#ifdef atarist
#define DEFAULT_CODE "atarist"
#else
#define DEFAULT_CHARSET "latin1"
#endif
#endif
#endif
/* Global declarations and definitions. */
#include <ctype.h>
#include <sys/types.h>
#include <sys/stat.h>
#ifdef HAVE_STRING_H
#include <string.h>
#else
#include <strings.h>
#define strchr index
#define strrchr rindex
#endif
#ifdef MSDOS
#include <dir.h>
#define unlink dummy1
#include <io.h>
#undef unlink
#include <fcntl.h>
#endif
#include <errno.h>
#ifndef errno
extern int errno;
#endif
#include "getopt.h"
/* tmpnam/tmpname/mktemp/tmpfile and the associate logic has been the
main portability headache of GNU recode :-(.
People reported that tmpname does not exist everywhere. On OS/2,
recode aborts if the prefix has more than five characters.
tmpnam seems to exist everywhere so far. But NeXT's tmpnam() is such
that, if called many times in succession, it will always return the
same value. One has to really open a file with the returned name
first, for the next call to tmpnam() to return a different value. I
can manage it for a single invocation of recode, but using two recode
invocations connected with a shell pipe, on the NeXT, creates a race
by which both copies may call tmpnam() in parallel, then getting the
same value, and will consequently open the same temporary file.
Noah Friedman <friedman@gnu.ai.mit.edu> suggests opening the file with
O_EXCL, and when the open presumably fails, call tmpnam again, or try
the mktemp routine in the GNU C library...maybe that will work better.
Michael I Bushnell <mib@gnu.ai.mit.edu> suggests always using tmpfile,
which opens the file too, using the O_EXCL option to open.
I'm trying this last suggestion, rewinding instead of closing.
Someone reported, a long while ago, that rewind did not work on his
system, so I reverted to opening and closing the temporary files all
the time. I lost the precise references for this problem. In any
case, I'm reusing rewind with tmpfile, now. Hopefully, someone will
tell me if this creates a problem somewhere! */
/* The previous round used tmpnam(3). This one tries tmpfile(3). */
/* #define USE_TMPNAM 1 */
#define USE_TMPFILE 1
#ifdef USE_TMPNAM
/* Guarantee some value for L_tmpnam. */
#ifdef MSDOS
#define L_tmpnam 13
#else /* not MSDOS */
char *tmpnam ();
#ifndef L_tmpnam
#include "pathmax.h"
#define L_tmpnam PATH_MAX
#endif
#endif /* not MSDOS */
#endif /* USE_TMPNAM */
#ifdef USE_TMPFILE
FILE *tmpfile _((void));
#endif /* USE_TMPFILE */
/* Variables. */
/* Program identification. */
const char *const version_string = "GNU recode version 3.3";
const char *const copyright_string = "\
This program is free software; you can redistribute it and/or modify\n\
it under the terms of the GNU General Public License as published by\n\
the Free Software Foundation; either version 2, or (at your option)\n\
any later version.\n\
\n\
This program is distributed in the hope that it will be useful,\n\
but WITHOUT ANY WARRANTY; without even the implied warranty of\n\
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\n\
GNU General Public License for more details.\n\
\n\
You should have received a copy of the GNU General Public License\n\
along with this program; if not, write to the Free Software\n\
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.\n";
/* The name this program was run with. */
const char *program_name;
/* If non-zero, display usage information and exit. */
static int show_help = 0;
/* If non-zero, print the version on standard output and exit. */
static int show_version = 0;
/* If non-zero, show a list of one or all known charsets, then exit. */
static int show_charsets = 0;
/* Indicates the format for showing only one charset. */
enum list_format list_format = NO_FORMAT;
/* If non-zero, merely explore all recoding paths, report and exit. */
static int auto_check_mode = 0;
/* If non-zero, produce C code for initializing the conversion and exit. */
static int make_header_mode = 0;
/* Table name in generated C code. */
static const char *header_name = NULL;
/* If the recoding yields some problems in reversability, the replacement is
normally not completed and the file is left unrecoded. The following
option forces the replacement even if the case the recoding is not
reversible. But if recode is used as a mere filter, there is no file
replacement and this option is then irrelevant. */
int force_option = 0;
/* This option prevents recode from automatically completing charsets. */
int strict_mapping = 0;
/* By selecting the following option, the program will echo to stderr the
sequence of elementary recoding steps which will be taken to effect
the requested recoding. */
int verbose_option = 0;
/* When a file is recoded over itself, precautions are taken to move the
timestamps of the original file into the recoded file, so to make the
recoding the most transparent possible to make, and other tools.
However, selecting the following option inhibit the timestamps handling,
thus effectively `touching' the file. */
int touch_option = 0;
/* In `texte' charset, some countries use double quotes to mark diaeresis,
while other countries prefer colons. The following variable contains the
diaeresis character for `texte' charset. Nominally set to a double
quote, it can be forced to a colon by an option on recode command. */
char diaeresis_char = '"';
/* For `latex' charset, it is often convenient to recode the diacritics
only, while letting other LaTeX code using backslashes unrecoded.
In the other charset, one can edit text as well as LaTeX directives. */
int diacritics_only = 0;
/* For `ibmpc' charset, characters 176 to 223 are use to draw boxes.
If this variable is set, while getting out of `ibmpc', ASCII
characters are selected so to approximate these boxes. */
int ascii_graphics = 0;
/* The following charset name will be ignored, if given. */
static const char *ignored_name = NULL;
/* Unabridged names of BEFORE and AFTER charsets, even if still aliases.
These are used for naming the array in produced C code. */
static const char *before_full_name;
static const char *after_full_name;
/* Ordinals of list, BEFORE and AFTER charset. */
static CHARSET *list_charset;
static CHARSET *before_charset;
static CHARSET *after_charset;
/* Flag telling usage that we are decoding charsets. */
int decoding_charset_flag = 0;
/* Tells how various passes will be interconnected. */
enum sequence_strategy
{
STRATEGY_UNDECIDED, /* sequencing strategy is undecided yet */
SEQUENCE_WITH_FILES, /* do not fork, use intermediate files */
SEQUENCE_WITH_POPEN, /* use `popen(3)' to fork processes */
SEQUENCE_WITH_PIPE /* fork processes connected with `pipe(2)' */
};
enum sequence_strategy sequence_strategy = STRATEGY_UNDECIDED;
/* Known single steps. */
STEP single_step_array[MAX_SINGLE_STEPS];
int number_of_single_steps; /* number of announced single steps */
const unsigned char *one_to_same; /* identity recoding */
CHARSET *rfc1345; /* special RFC 1345 charset value */
/* Array stating the sequence of conversions. */
const STEP *sequence[MAX_SEQUENCE];
int length_of_sequence;
/* Quality handling. */
/*---------------------------------------.
| Return a string describing a quality. |
`---------------------------------------*/
const char *
quality_to_string (QUALITY quality)
{
switch (quality)
{
default:
abort ();
case REVERSIBLE:
return "reversible";
case ONE_TO_ONE:
return "one to one";
case MANY_TO_ONE:
return "many to one";
case ONE_TO_MANY:
return "one to many";
case MANY_TO_MANY:
return "many to many";
}
}
/*-------------------------------------------------------------------------.
| Return the quality of a step obtained by merging two others steps, given |
| their respective qualities FIRST and SECOND. |
`-------------------------------------------------------------------------*/
QUALITY
merge_qualities (QUALITY first, QUALITY second)
{
switch (first)
{
default:
abort ();
case REVERSIBLE:
return second;
case ONE_TO_ONE:
switch (second)
{
case REVERSIBLE:
case ONE_TO_ONE:
return ONE_TO_ONE;
case MANY_TO_ONE:
case ONE_TO_MANY:
case MANY_TO_MANY:
return second;
}
case MANY_TO_ONE:
switch (second)
{
case REVERSIBLE:
case ONE_TO_ONE:
case MANY_TO_ONE:
return MANY_TO_ONE;
case ONE_TO_MANY:
case MANY_TO_MANY:
return MANY_TO_MANY;
}
case ONE_TO_MANY:
switch (second)
{
case REVERSIBLE:
case ONE_TO_ONE:
case ONE_TO_MANY:
return ONE_TO_MANY;
case MANY_TO_ONE:
case MANY_TO_MANY:
return MANY_TO_MANY;
}
case MANY_TO_MANY:
return MANY_TO_MANY;
}
}
/* Charset handling. */
/*----------------------------------------------------.
| Decode the BEFORE:AFTER argument, given in STRING. |
`----------------------------------------------------*/
void
decode_before_after (const char *string)
{
char *before;
char *after;
char *in;
char *out;
/* Split the BEFORE:AFTER keyword at the colon. A backslash can escape
a colon in both charsets. */
before = xstrdup (string);
after = NULL;
out = before;
for (in = before; *in; in++)
if (*in == ':' && !after)
{
*out++ = '\0';
after = out;
}
else
{
if (*in == '\\' && *(in + 1))
in++;
*out++ = *in;
}
*out = '\0';
if (!after)
usage (EXIT_FAILURE);
/* Decode both charsets. */
before_full_name = clean_charset_name (before);
before_charset = find_charset (before_full_name);
after_full_name = clean_charset_name (after);
after_charset = find_charset (after_full_name);
/* Free the work area. */
free (before);
}
/* Single step handling. */
/*-----------------------------------------------------------------------.
| Allocate and initialize a new single step, save for the before and after |
| charsets and quality. |
`-----------------------------------------------------------------------*/
static STEP *
new_single_step (void)
{
STEP *step;
if (number_of_single_steps == MAX_SINGLE_STEPS)
error (EXIT_FAILURE, 0, "MAX_SINGLE_STEPS is too small");
step = single_step_array + number_of_single_steps++;
step->init_recode = NULL;
step->file_recode = NULL;
step->one_to_one = NULL;
step->one_to_many = NULL;
return step;
}
/*------------------------------------------------------------------------.
| Create and initialize a new single step for recoding between START_NAME |
| and GOAL_NAME, which are given as strings, give it a recoding QUALITY, |
| also saving an INIT_RECODE and a FILE_RECODE functions. |
`------------------------------------------------------------------------*/
void
declare_step (const char *before_name, const char *after_name, QUALITY quality,
void (*init_recode) (STEP *),
void (*file_recode) (const STEP *, FILE *, FILE *))
{
STEP *step;
step = new_single_step ();
step->before = find_charset (before_name);
step->after = find_charset (after_name);
step->quality = quality;
step->init_recode = init_recode;
step->file_recode = file_recode;
}
/*------------------------------------------------------------------.
| Create a one to one table which is the inverse of the given one. |
`------------------------------------------------------------------*/
unsigned char *
invert_table (const unsigned char *table)
{
unsigned char flag[256];
unsigned char *result;
int table_error;
int counter;
result = (unsigned char *) xmalloc (256);
memset (flag, 0, 256);
table_error = 0;
for (counter = 0; counter < 256; counter++)
{
if (flag[table[counter]])
{
error (0, 0, "Codes %3d and %3d both recode to %3d",
result[table[counter]], counter, table[counter]);
table_error = 1;
}
else
{
result[table[counter]] = counter;
flag[table[counter]] = 1;
}
}
if (table_error)
{
for (counter = 0; counter < 256; counter++)
if (!flag[counter])
error (0, 0, "No character recodes to %3d", counter);
error (EXIT_FAILURE, 0, "Cannot invert given one-to-one table");
}
return result;
}
/*-------------------------------------------------------------------------.
| Complete a STEP descriptor by a constructed recoding array for 256 chars |
| and the adequate recoding routine. If FIRST_HALF_IMPLIED is not zero, |
| default the unconstrained characters of the first 128 to the identity |
| mapping. Use an KNOWN_PAIRS array of NUMBER_OF_PAIRS constraints. If |
| REVERSE is not zero, use right_table instead of left_table. |
`-------------------------------------------------------------------------*/
void
complete_pairs (STEP *step, int first_half_implied,
const KNOWN_PAIR *known_pairs, int number_of_pairs,
int reverse)
{
unsigned char left_flag[256];
unsigned char right_flag[256];
unsigned char left_table[256];
unsigned char right_table[256];
int table_error;
unsigned char *flag;
unsigned char *table;
const char **table2;
char *cursor;
unsigned char left;
unsigned char right;
unsigned char search;
int counter;
int used;
/* Init tables with zeroes. */
memset (left_flag, 0, 256);
memset (right_flag, 0, 256);
table_error = 0;
/* Establish known data. */
for (counter = 0; counter < number_of_pairs; counter++)
{
left = known_pairs[counter].left;
right = known_pairs[counter].right;
/* Set one known correspondance. */
if (left_flag[left])
{
if (!table_error)
{
error (0, 0, "Following diagnostics for `%s' to `%s'",
step->before->name, step->after->name);
table_error = 1;
}
error (0, 0, "Pair no. %d: { %3d, %3d } conflicts with { %3d, %3d }",
counter, left, right, left, left_table[left]);
}
else if (right_flag[right])
{
if (!table_error)
{
error (0, 0, "Following diagnostics for `%s' to `%s'",
step->before->name, step->after->name);
table_error = 1;
}
error (0, 0, "Pair no. %d: { %3d, %3d } conflicts with { %3d, %3d }",
counter, left, right, right_table[right], right);
}
else
{
left_flag[left] = 1;
left_table[left] = right;
right_flag[right] = 1;
right_table[right] = left;
}
}
/* Set all the implied correspondances. */
if (first_half_implied)
for (counter = 0; counter < 128; counter++)
if (!left_flag[counter] && !right_flag[counter])
{
left_flag[counter] = 1;
left_table[counter] = counter;
right_flag[counter] = 1;
right_table[counter] = counter;
}
if (strict_mapping)
{
/* If the recoding is strict, prepare a one to many table, each
entry being NULL or a string of a single character. */
/* Select the proper table. */
if (reverse)
{
flag = right_flag;
table = right_table;
}
else
{
flag = left_flag;
table = left_table;
}
/* Allocate everything in one blow, so it will be freed likewise. */
used = 0;
for (counter = 0; counter < 256; counter++)
if (flag[counter])
used++;
table2 = (const char **) xmalloc (256 * sizeof (char *) + 2 * used);
cursor = (char *) (table2 + 256);
/* Construct the table and the strings in parallel. */
for (counter = 0; counter < 256; counter++)
if (flag[counter])
{
table2[counter] = cursor;
*cursor++ = table[counter];
*cursor++ = '\0';
}
else
table2[counter] = NULL;
/* Save a one to many recoding table. */
step->file_recode = file_one_to_many;
step->one_to_many = table2;
}
else
{
/* If the recoding is not strict, compute a reversible one to one
table. */
if (table_error)
error (EXIT_FAILURE, 0,
"Cannot complete table from set of known pairs");
/* Close the table with small permutation cycles. */
for (counter = 0; counter < 256; counter++)
if (!right_flag[counter])
{
search = counter;
while (left_flag[search])
search = left_table[search];
left_flag[search] = 1;
left_table[search] = counter;
right_flag[counter] = 1;
right_table[counter] = search;
}
/* Save a copy of the proper table. */
step->file_recode = file_one_to_one;
table = (unsigned char *) xmalloc (256);
memcpy (table, reverse ? right_table : left_table, 256);
step->one_to_one = table;
}
}
/*----------------------------------------.
| Initialize all collected single steps. |
`----------------------------------------*/
void
register_all_modules (void)
{
STEP *step;
int counter;
unsigned char *table;
table = (unsigned char *) xmalloc (256);
for (counter = 0; counter < 256; counter++)
table[counter] = counter;
one_to_same = table;
prepare_charset_initialization ();
number_of_single_steps = 0;
rfc1345 = find_charset ("RFC 1345");
declare_alias (".", "RFC 1345");
declare_alias ("latin1", "ISO_8859-1:1987");
declare_alias ("lat1", "latin1");
#include "initstep.h"
for (step = single_step_array;
step < single_step_array + number_of_single_steps;
step++)
if (step->file_recode == file_one_to_one
&& step->one_to_one == one_to_same)
step->conversion_cost = 0;
else
switch (step->quality)
{
case REVERSIBLE:
step->conversion_cost = 2;
break;
case ONE_TO_ONE:
step->conversion_cost = 30;
break;
case MANY_TO_ONE:
step->conversion_cost = 10;
break;
case ONE_TO_MANY:
step->conversion_cost = 40;
break;
case MANY_TO_MANY:
step->conversion_cost = 50;
break;
}
/* For all RFC 1345 participating steps, halve the cost since they
come in pair. */
for (counter = 0; counter < number_of_single_steps; counter++)
if (single_step_array[counter].before == rfc1345
|| single_step_array[counter].after == rfc1345)
single_step_array[counter].conversion_cost /= 2;
}
/*-------------------------------------------------------------------------.
| Produce a C include file representing the recoding, on standard output. |
`-------------------------------------------------------------------------*/
static void
output_header_file (void)
{
const STEP *step; /* step being analysed */
int column; /* column counter */
char *name; /* constructed name */
char *cursor; /* cursor in constructed name */
const char *cursor2; /* cursor to study strings */
int counter; /* general purpose counter */
/* This function is called only when the recoding sequence contains a
single step, so it is safe to use sequence[0] for the step. */
step = sequence[0];
/* Print the header of the header file. */
printf ("/* Conversion table from `%s' charset to `%s' charset.\n",
before_full_name, after_full_name);
printf (" Generated mechanically by %s.\n", version_string);
printf ("\n");
switch (sequence[0]->quality)
{
case REVERSIBLE:
printf (" The recoding should be reversible.\n");
break;
case ONE_TO_ONE:
printf (" The recoding might not be reversible.\n");
break;
case MANY_TO_ONE:
printf (" Programming is needed to handle multichar input.\n");
break;
case ONE_TO_MANY:
printf (" Each input char transforms into an output string.\n");
break;
case MANY_TO_MANY:
printf (" Each input char transforms into an output string,\n");
printf (" programming is needed to handle multichar input.\n");
break;
}
printf ("*/\n");
printf ("\n");
/* Construct the name of the resulting table. */
if (header_name)
name = xstrdup (header_name);
else
{
name = (char *) xmalloc (strlen (before_full_name) + sizeof "_to_"
+ strlen (after_full_name));
strcpy (name, before_full_name);
strcat (name, "_to_");
strcat (name, after_full_name);
}
/* Ensure the table name contains only valid characters for a C
identifier. */
for (cursor = name; *cursor; cursor++)
if (*cursor != '_'
&& (*cursor < 'a' || *cursor > 'z')
&& (*cursor < 'A' || *cursor > 'Z')
&& (*cursor < '0' || *cursor > '9'))
*cursor = '_';
/* Produce the recoding table in the correct format. */
if (step->one_to_one)
{
/* Produce a one to one recoding table. */
printf ("unsigned char const %s[256] =\n", name);
printf (" {\n");
for (counter = 0; counter < 256; counter++)
{
printf ("%s%3d,", counter % 8 == 0 ? " " : " ",
step->one_to_one[counter]);
if (counter % 8 == 7)
printf (" /* %3d - %3d */\n", counter - 7, counter);
}
printf (" };\n");
}
else if (step->one_to_many)
{
/* Produce a one to many recoding table. */
printf ("const char *%s[256] =\n", name);
printf (" {\n");
for (counter = 0; counter < 256; counter++)
{
printf (" ");
column = 4;
if (step->one_to_many[counter])
{
printf ("\"");
column++;
for (cursor2 = step->one_to_many[counter]; *cursor2; cursor2++)
switch (*cursor2)
{
case ' ':
printf (" ");
column++;
break;
case '\b':
printf ("\\b");
column += 2;
break;
case '\t':
printf ("\\t");
column += 2;
break;
case '\n':
printf ("\\n");
column += 2;
break;
case '"':
printf ("\\\"");
column += 2;
break;
case '\\':
printf ("\\\\");
column += 2;
break;
default:
if (isprint (*cursor2))
{
printf ("%c", *cursor2);
column++;
}
else
{
printf ("\\%0.3o", *(const unsigned char *) cursor2);
column += 4;
}
}
printf ("\"");
column++;
}
else
{
printf ("0");
column++;
}
printf (", ");
column += 2;
while (column < 32)
{
printf (" ");
column++;
}
printf ("/* %3d */\n", counter);
}
printf (" };\n");
}
else
error (EXIT_FAILURE, 0, "No table to print");
free (name);
}
/* Double step handling (for RFC 1345). */
/*-----------------------------------------------------------------------.
| Associate a double TABLE with charset NAME, part of the RFC 1345 fully |
| connected set. Each entry in table uses SIZE characters. |
`-----------------------------------------------------------------------*/
void
declare_double_step (DOUBLE_TABLE *table, const char *name, int size)
{
CHARSET *charset;
STEP *step;
charset = find_charset (name);
charset->table = table;
charset->size = size;
step = new_single_step ();
step->before = charset;
step->after = rfc1345;
step->quality = strict_mapping ? ONE_TO_MANY : REVERSIBLE;
step->init_recode = NULL;
step->file_recode = NULL;
step = new_single_step ();
step->before = rfc1345;
step->after = charset;
step->quality = strict_mapping ? ONE_TO_MANY : REVERSIBLE;
step->init_recode = NULL;
step->file_recode = NULL;
}
void
init_recode_rfc1345 (STEP *step)
{
char pool[2 * 256 * 6]; /* character pool */
struct sort
{
const char *key; /* ISO 10646 name, blank padded to size */
int code; /* corresponding charset code (0..255) */
};
struct side
{
CHARSET *charset; /* charset */
struct sort binding[256]; /* bindings */
int bindings; /* number of bindings */
};
DOUBLE_TABLE *table; /* RFC 1345 table */
struct side side_array[2]; /* information for each side */
struct side *side; /* cursor into side_array */
int reversed; /* if both sides reversed */
KNOWN_PAIR pair[256]; /* obtained pairings */
int pairs; /* number of pairings */
const char *in; /* cursor in double table strings */
char *out; /* cursor in character pool */
int code; /* character code */
int row_counter; /* double table row counter */
int position_counter; /* double table column counter */
int counter; /* counter for characters */
int left; /* left bindings counter */
int right; /* right bindings counter */
/* For ensuring reversibility, known pairs should be computed the same
way regardless of the direction of recoding. This canonalization is
ensured through the charset values, which are increasing along the
initialization order. This should also reflect the charset order in
rfc1345.txt. */
if (step->before < step->after)
{
side_array[0].charset = step->before;
side_array[1].charset = step->after;
reversed = 0;
}
else
{
side_array[0].charset = step->after;
side_array[1].charset = step->before;
reversed = 1;
}
out = pool;
for (side = side_array; side < side_array + 2; side++)
{
/* Move out the values for sorting out of the double table. */
side->bindings = 0;
code = 0;
table = side->charset->table;
for (row_counter = 0; row_counter < 8; row_counter++)
if (in = (*table)[row_counter], in)
for (position_counter = 0; position_counter < 32; position_counter++)
{
if (*in == ' ')
in += side->charset->size;
else
{
/* Establish a new binding. */
side->binding[side->bindings].code = code;
side->binding[side->bindings].key = out;
side->bindings++;
/* Copy out the value to the character pool, and terminate it
with a NULL. */
for (counter = 0; counter < side->charset->size; counter++)
if (*in == ' ')
in++;
else
*out++ = *in++;
*out++ = '\0';
}
code++;
}
else
code += 32;
}
/* This crude, quadratic pairing code will do for now. */
pairs = 0;
for (left = 0; left < side_array[0].bindings; left++)
{
for (right = 0; right < side_array[1].bindings; right++)
if (strcmp (side_array[0].binding[left].key,
side_array[1].binding[right].key)
== 0)
{
pair[pairs].left = side_array[0].binding[left].code;
pair[pairs].right = side_array[1].binding[right].code;
pairs++;
break;
}
}
/* Complete the recoding table out of this. */
complete_pairs (step, 0, pair, pairs, reversed);
}
/* Step sequence handling. */
#define UNREACHABLE 30000 /* No way for this conversion */
/*-------------------------------------------------------.
| Explain what recoding step sequence has been planned. |
`-------------------------------------------------------*/
static void
echo_sequence (void)
{
const char *last; /* last name printed */
const char *name; /* name being printed */
QUALITY quality; /* cumulative quality */
int counter; /* index into sequence */
if (length_of_sequence < 0)
error (0, 0, "UNACHIEVABLE recoding!");
else if (length_of_sequence == 0)
error (0, 0, "Mere copy, no recoding");
else
{
quality = REVERSIBLE;
last = NULL;
for (counter = 0; counter < length_of_sequence; counter++)
{
name = sequence[counter]->before->name;
if (counter == 0)
fprintf (stderr, "%s", name);
else if (name != last)
fprintf (stderr, "/%s", name);
name = sequence[counter]->after->name;
fprintf (stderr, " -> %s", name);
quality = merge_qualities (quality, sequence[counter]->quality);
last = name;
}
fprintf (stderr, " (%s)\n", quality_to_string (quality));
}
}
/*----------------------------------------------------------.
| Find a sequence of single steps to achieve a conversion. |
`----------------------------------------------------------*/
static void
find_sequence (CHARSET *before, CHARSET *after)
{
struct search
{
STEP *step; /* step who will bring us nearer to after */
int cost; /* cost from here through after */
};
struct search *search_array; /* critical path search tree */
struct search *search; /* item in search_array for charset */
STEP *step; /* cursor in possible single_steps */
int cost; /* cost under consideration */
int modified; /* != 0 if modified since last iteration */
CHARSET *charset; /* charset while reconstructing sequence */
search_array
= (struct search *) xmalloc (number_of_charsets * sizeof (struct search));
/* Initialize the search for an economical route, looking our way
backward from the after towards the before. */
for (search = search_array;
search < search_array + number_of_charsets;
search++)
{
search->step = NULL;
search->cost = UNREACHABLE;
}
search_array[after - charset_array].cost = 0;
modified = 1;
while (modified)
{
modified = 0;
for (step = single_step_array;
step < single_step_array + number_of_single_steps;
step++)
if (!step->before->ignore)
{
cost = search_array[step->after - charset_array].cost;
if (cost != UNREACHABLE)
{
cost += step->conversion_cost;
search = search_array + (step->before - charset_array);
if (cost < search->cost)
{
search->step = step;
search->cost = cost;
modified = 1;
}
}
}
}
if (search_array[before - charset_array].cost == UNREACHABLE)
{
/* If no path has been found, return with a negative length. */
length_of_sequence = -1;
}
else
{
/* Save the retained best path in the sequence array. While doing so,
optimize out any single step which merely copies. Also execute the
delayed initialization for those steps which registered one. */
length_of_sequence = 0;
for (charset = before; charset != after; charset = step->after)
{
step = search_array[charset - charset_array].step;
if (step->file_recode != file_one_to_one
|| step->one_to_one != one_to_same)
{
if (length_of_sequence == MAX_SEQUENCE)
error (EXIT_FAILURE, 0, "MAX_SEQUENCE is too small");
sequence[length_of_sequence++] = step;
if (step->init_recode)
{
(*step->init_recode) (step);
step->init_recode = NULL;
}
}
}
}
/* Tell what has been decided, for the user. */
if (verbose_option)
echo_sequence ();
free (search_array);
}
/*---------------------------------------------------------------------.
| Optimize a sequence of single steps by creating new single steps, if |
| this can be done by merging adjacent steps which are simple enough. |
`---------------------------------------------------------------------*/
static void
optimize_sequence (void)
{
int saved_steps; /* number of saved steps */
unsigned char *accum; /* one_to_one accumulated recoding */
unsigned char temp[256]; /* temporary value for accum array */
const char **string; /* one_to_many recoding */
STEP *step; /* new single step being constructed */
int in; /* ordinal of next studied sequence step */
int out; /* ordinal of next output sequence step */
int counter; /* all purpose counter */
saved_steps = 0;
/* See if there are some RFC 1345 double steps to merge. */
in = 0;
out = 0;
while (in < length_of_sequence)
if (sequence[in]->after == rfc1345
&& in < length_of_sequence - 1
&& sequence[in+1]->before == rfc1345)
{
/* Produce a new single step for the double step. */
step = new_single_step ();
step->before = sequence[in]->before;
step->after = sequence[in+1]->after;
step->quality = merge_qualities (sequence[in]->quality,
sequence[in+1]->quality);
step->init_recode = init_recode_rfc1345;
step->file_recode = file_one_to_one;
in += 2;
saved_steps++;
/* Initialize the new single step. If not, it will not be
mergeable with others. */
(*step->init_recode) (step);
step->init_recode = NULL;
sequence[out++] = step;
}
else if (sequence[in]->before == rfc1345
|| sequence[in]->after == rfc1345)
error (EXIT_FAILURE, 0, "You may not ask for RFC 1345 explicitely");
else
sequence[out++] = sequence[in++];
length_of_sequence = out;
/* Recopy the sequence array over itself, while merging subsequences of
one or more consecutive one-to-one recodings, including an optional
final one-to-many recoding. */
in = 0;
out = 0;
while (in < length_of_sequence)
if (sequence[in]->one_to_one
&& (sequence[in]->file_recode == file_one_to_one || make_header_mode)
&& in < length_of_sequence - 1
&& (((sequence[in+1]->one_to_one)
&& (sequence[in+1]->file_recode == file_one_to_one
|| make_header_mode))
|| (sequence[in+1]->one_to_many
&& (sequence[in+1]->file_recode == file_one_to_many
|| make_header_mode))))
{
/* Construct a new single step, and initialize a cumulative
one-to-one recoding with the identity permutation. */
accum = (unsigned char *) xmalloc (256);
for (counter = 0; counter < 256; counter++)
accum[counter] = counter;
step = new_single_step ();
step->before = sequence[in]->before;
step->quality = REVERSIBLE;
/* Merge in all consecutive one-to-one recodings. */
while (in < length_of_sequence
&& sequence[in]->one_to_one
&& (sequence[in]->file_recode == file_one_to_one
|| make_header_mode))
{
for (counter = 0; counter < 256; counter++)
temp[counter] = sequence[in]->one_to_one[accum[counter]];
for (counter = 0; counter < 256; counter++)
accum[counter] = temp[counter];
step->after = sequence[in]->after;
step->quality
= merge_qualities (step->quality, sequence[in]->quality);
in++;
saved_steps++;
}
/* Check for a possible one-to-many recoding. */
if (in < length_of_sequence
&& sequence[in]->one_to_many
&& (sequence[in]->file_recode == file_one_to_many
|| make_header_mode))
{
/* Merge in the one-to-many recoding, and make the new single
step be a one-to-many recoding. */
string = (const char **) xmalloc (256 * sizeof (char *));
for (counter = 0; counter < 256; counter++)
string[counter] = sequence[in]->one_to_many[accum[counter]];
free (accum);
step->one_to_many = string;
step->file_recode = file_one_to_many;
step->after = sequence[in]->after;
step->quality
= merge_qualities (step->quality, sequence[in]->quality);
in++;
saved_steps++;
}
else
{
/* Make the new single step be a one-to-one recoding. */
step->one_to_one = accum;
step->file_recode = file_one_to_one;
}
/* Save the newly created step. */
sequence[out++] = step;
}
else
{
/* This step is not being optimized. Keep it verbatim. */
sequence[out++] = sequence[in++];
}
/* Save the resulting sequence length, and tell the user if something
changed. */
length_of_sequence = out;
if (saved_steps > 0 && verbose_option)
echo_sequence ();
}
/* Recoding execution control. */
/*--------------.
| Copy a file. |
`--------------*/
static void
file_copy (FILE *input_file, FILE *output_file)
{
int input_char; /* current character */
while (input_char = getc (input_file), input_char != EOF)
putc (input_char, output_file);
}
/*--------------------------------------------------.
| Recode a file using a one-to-one recoding table. |
`--------------------------------------------------*/
void
file_one_to_one (const STEP *step, FILE *input_file, FILE *output_file)
{
const unsigned char *table; /* conversion table */
int input_char; /* current character */
table = step->one_to_one;
if (table == one_to_same)
/* Optimize a little an identity recoding. */
file_copy (input_file, output_file);
else
/* Copy the file through the one to one recoding table. */
while (input_char = getc (input_file), input_char != EOF)
putc (table[input_char], output_file);
}
/*---------------------------------------------------.
| Recode a file using a one-to-many recoding table. |
`---------------------------------------------------*/
void
file_one_to_many (const STEP *step, FILE *input_file, FILE *output_file)
{
const char *const *table; /* conversion table */
int input_char; /* current character */
const char *output_string; /* translated characters */
/* Copy the file through the one to many recoding table. */
table = step->one_to_many;
while (input_char = getc (input_file), input_char != EOF)
if (output_string = table[input_char], output_string)
while (*output_string)
{
putc (*output_string, output_file);
output_string++;
}
}
/*-------------------------------------------------------------------.
| Execute the conversion sequence, using several passes with two |
| alternating intermediate files. This routine assumes at least one |
| needed recoding step. |
`-------------------------------------------------------------------*/
static void
execute_pass_sequence (const char *input_name, const char *output_name)
{
int sequence_index; /* index into sequence */
const STEP *step; /* pointer to step */
FILE *input_file; /* input file to recoding step */
FILE *output_file; /* output file from recoding step */
#ifdef USE_TMPNAM
char *temp_input_name; /* step input file name */
char *temp_output_name; /* step output file name */
char temp_name_1[L_tmpnam]; /* one temporary file name */
char temp_name_2[L_tmpnam]; /* another temporary file name */
char *exchange_temp; /* for exchanging temporary names */
#endif
#ifdef USE_TMPNAM
/* Choose names for intermediate files. Use "" for delaying them. */
#ifdef MSDOS
strcpy (temp_name_1, "recodex1.tmp");
strcpy (temp_name_2, "recodex2.tmp");
#else
*temp_name_1 = '\0';
*temp_name_2 = '\0';
#endif
temp_input_name = temp_name_1;
temp_output_name = temp_name_2;
#endif /* USE_TMPNAM */
/* Execute one pass for each step of the sequence. */
for (sequence_index = 0;
sequence_index < length_of_sequence;
sequence_index++)
{
/* Select the input file for this step. */
if (sequence_index == 0)
if (input_name)
{
input_file = fopen (input_name, "r");
if (input_file == NULL)
error (EXIT_FAILURE, errno, input_name);
}
else
input_file = stdin;
else
{
#ifdef USE_TMPNAM
input_file = fopen (temp_input_name, "r");
if (input_file == NULL)
error (EXIT_FAILURE, errno, temp_input_name);
#endif
#ifdef USE_TMPFILE
rewind (input_file);
#endif
}
/* Select the output file for this step. */
if (sequence_index == length_of_sequence - 1)
if (output_name)
{
output_file = fopen (output_name, "w");
if (output_file == NULL)
error (EXIT_FAILURE, errno, output_name);
}
else
output_file = stdout;
else
{
#ifdef USE_TMPNAM
#ifndef MSDOS
if (*temp_output_name == '\0')
tmpnam (temp_output_name);
#endif
output_file = fopen (temp_output_name, "w");
if (output_file == NULL)
error (EXIT_FAILURE, errno, temp_output_name);
#endif
#ifdef USE_TMPFILE
output_file = tmpfile ();
if (output_file == NULL)
error (EXIT_FAILURE, errno, "tmpfile()");
#endif
}
/* Execute one recoding step. */
step = sequence[sequence_index];
(*step->file_recode) (step, input_file, output_file);
/* Close the input file, unlink it if it was temporary. */
if (sequence_index == 0)
{
if (input_name)
fclose (input_file);
}
else
{
fclose (input_file);
#ifdef USE_TMPNAM
unlink (temp_input_name);
#endif
}
/* Close the output file, exchange names for subsequent step. */
if (sequence_index == length_of_sequence - 1)
{
if (output_name)
fclose (output_file);
}
else
{
#ifdef USE_TMPNAM
fclose (output_file);
exchange_temp = temp_input_name;
temp_input_name = temp_output_name;
temp_output_name = exchange_temp;
#endif
#ifdef USE_TMPFILE
input_file = output_file;
#endif
}
}
}
/*-------------------------------------------------------------------------.
| Execute the conversion sequence, using a chain of invocations of the |
| program through popen. This routine assumes that more than one recoding |
| step is needed. |
`-------------------------------------------------------------------------*/
#ifdef HAVE_POPEN
static void
execute_popen_sequence (const char *input_name, const char *output_name)
{
const STEP *step; /* current step */
FILE *input_file; /* input file to recoding step */
FILE *output_file; /* output file from recoding step */
char popen_command[80]; /* to receive command string */
int status; /* status to be asserted */
/* Construct a `recode' command for all recoding steps but the first. */
sprintf (popen_command, "%s -o %s %s:%s %s%s",
program_name,
diaeresis_char == ':' ? " -c" : "",
clean_charset_name (sequence[1]->before->name),
clean_charset_name (sequence[length_of_sequence-1]->after->name),
output_name ? "> " : "",
output_name ? output_name : "");
/* Execute the first recoding step. */
if (!input_name)
input_file = stdin;
else if (input_file = fopen (input_name, "r"), input_file == NULL)
error (EXIT_FAILURE, errno, input_name);
if (output_file = popen (popen_command, "w"), output_file == NULL)
error (EXIT_FAILURE, errno, popen_command);
step = sequence[0];
(*step->file_recode) (step, input_file, output_file);
if (input_name)
fclose (input_file);
status = pclose (output_file);
if (status != 0)
error (EXIT_FAILURE, errno, popen_command);
}
#endif /* HAVE_POPEN */
/*-------------------------------------------------------------------------.
| Execute the conversion sequence, forking the program many times for all |
| elementary steps, interconnecting them with pipes. This routine assumes |
| at least one recoding step is needed. |
`-------------------------------------------------------------------------*/
#ifndef HAVE_DUP2
#undef HAVE_PIPE
#endif
#ifdef HAVE_PIPE
static void
execute_pipe_sequence (const char *input_name, const char *output_name)
{
int sequence_index; /* index into sequence */
const STEP *step; /* pointer into single_steps */
FILE *input_file; /* input file to recoding step */
FILE *output_file; /* output file from recoding step */
int pipe_pair[2]; /* pair of file descriptors for a pipe */
int child_process; /* child process number, zero if child */
int status; /* status to be asserted */
/* Prepare the final output file. */
if (output_name)
{
output_file = fopen (output_name, "w");
if (output_file != NULL)
error (EXIT_FAILURE, errno, output_name);
}
else
output_file = stdout;
/* Create all subprocesses and interconnect them. */
for (sequence_index = length_of_sequence - 1;
sequence_index > 0;
sequence_index--)
{
status = pipe (pipe_pair);
if (status != 0)
error (EXIT_FAILURE, errno, "Creating pipe");
child_process = fork ();
if (child_process < 0)
error (EXIT_FAILURE, errno, "Forking process");
if (child_process == 0)
{
/* The child executes its recoding step, reading from the pipe
and writing to the current output file; then it exits. */
status = close (pipe_pair[1]);
if (status != 0)
error (EXIT_FAILURE, errno, "Closing pipe output side");
input_file = fdopen (pipe_pair[0], "r");
if (input_file == NULL)
error (EXIT_FAILURE, errno, "Streaming pipe input side");
step = sequence[sequence_index];
(*step->file_recode) (step, input_file, output_file);
fclose (input_file);
if (sequence_index < length_of_sequence - 1 || output_name)
fclose (output_file);
exit (EXIT_SUCCESS);
}
else
{
/* The parent redirects the current output file to the pipe. */
status = dup2 (pipe_pair[1], fileno (output_file));
if (status == -1)
error (EXIT_FAILURE, errno, "Duplicating pipe output");
status = close (pipe_pair[0]);
if (status != 0)
error (EXIT_FAILURE, error, "Closing pipe input side");
status = close (pipe_pair[1]);
if (status != 0)
error (EXIT_FAILURE, error, "Closing pipe output side");
}
}
/* All the children are created, blocked on read. Now, feed the whole
chain of processes with the output of the first recoding step. */
if (!input_name)
input_file = stdin;
else
{
input_file = fopen (input_name, "r");
if (input_file == NULL)
error (EXIT_FAILURE, errno, input_name);
}
(*sequence[0]->routine) (input_file, output_file);
if (input_name)
fclose (input_file);
if (output_name)
fclose (output_file);
/* Wait on all children, mainly to avoid synchronisation problems on
output file contents, but also to reduce the number of zombie
processes in case the user recodes many files at once. */
while (wait (NULL) > 0)
;
}
#endif /* HAVE_PIPE */
/*-----------------------------------------------------------------------.
| Execute the conversion sequence, using the selected strategy whenever |
| more than one conversion step is needed. If no conversion are needed, |
| merely copy the input onto the output. |
`-----------------------------------------------------------------------*/
/* If some sequencing strategies are missing, this routine automatically
uses fallback strategies. */
static void
execute_sequence (const char *input_name, const char *output_name)
{
FILE *input_file; /* input file to recoding step */
FILE *output_file; /* output file from recoding step */
const STEP *step; /* current step */
#ifdef MSDOS
if (!input_name)
setmode (fileno (stdin), O_BINARY);
if (!output_name)
setmode (fileno (stdout), O_BINARY);
_fmode = O_BINARY;
#endif
if (verbose_option && input_name)
{
fprintf (stderr, "Recoding %s...", input_name);
fflush (stderr);
}
if (length_of_sequence > 1)
switch (sequence_strategy)
{
case STRATEGY_UNDECIDED:
error (EXIT_FAILURE, 0, "Internal error - strategy undecided");
case SEQUENCE_WITH_PIPE:
#ifdef HAVE_PIPE
execute_pipe_sequence (input_name, output_name);
break;
#endif
case SEQUENCE_WITH_POPEN:
#ifdef HAVE_POPEN
execute_popen_sequence (input_name, output_name);
break;
#endif
case SEQUENCE_WITH_FILES:
execute_pass_sequence (input_name, output_name);
break;
}
else
{
/* This is a single-step recoding or a mere copy. Do it. */
if (!input_name)
input_file = stdin;
else if (input_file = fopen (input_name, "r"), input_file == NULL)
error (EXIT_FAILURE, errno, input_name);
if (!output_name)
output_file = stdout;
else if (output_file = fopen (output_name, "w"), output_file == NULL)
error (EXIT_FAILURE, errno, output_name);
if (length_of_sequence == 1)
{
step = sequence[0];
(*step->file_recode) (step, input_file, output_file);
}
else
file_copy (input_file, output_file);
if (input_name)
fclose (input_file);
if (output_name)
fclose (output_file);
}
if (verbose_option && input_name)
{
fprintf (stderr, " done\n");
fflush (stderr);
}
}
/* Some special option handling. */
/*-----------------------------------------------------------------.
| Print a truncated charset name, with a guaranteed space at end. |
`-----------------------------------------------------------------*/
static void
print_truncated_charset_name (const char *name)
{
char copy[15];
if ((int) strlen (name) > 14)
{
memcpy (copy, name, 14);
copy[14] = '\0';
name = copy;
}
printf ("%-14s ", name);
}
/*----------------------------------------------------.
| Find all possible sequences and report about them. |
`----------------------------------------------------*/
static void
report_about_all_sequences (void)
{
CHARSET *before;
CHARSET *after;
int saved_length_of_sequence;
int saved_number_of_single_steps;
QUALITY quality;
const char *quality_string;
int counter;
STEP *step;
for (before = charset_array;
before < charset_array + number_of_charsets;
before++)
if (!before->ignore && before != rfc1345)
for (after = charset_array;
after < charset_array + number_of_charsets;
after++)
if (!after->ignore && after != before && after != rfc1345)
{
/* Study what we can do. */
find_sequence (before, after);
if (length_of_sequence < 0)
{
if (!ignored_name)
{
print_truncated_charset_name (before->name);
print_truncated_charset_name (after->name);
printf (" UNACHIEVABLE\n");
}
}
else
{
/* Compute the recoding quality. */
quality = REVERSIBLE;
for (counter = 0; counter < length_of_sequence; counter++)
quality = merge_qualities (quality,
sequence[counter]->quality);
quality_string = quality_to_string (quality);
/* Study what optimization can do. */
saved_length_of_sequence = length_of_sequence;
saved_number_of_single_steps = number_of_single_steps;
optimize_sequence ();
/* Check and report codes which should be aliases. */
if (length_of_sequence == 1 && sequence[0]->one_to_one)
{
for (counter = 0; counter < 256; counter++)
if (sequence[0]->one_to_one[counter] != counter)
break;
if (counter == 256)
quality_string = "ONE to SAME";
}
/* Make the report. */
print_truncated_charset_name (before->name);
print_truncated_charset_name (after->name);
printf (" %-16s", quality_string);
printf ("steps: %d", saved_length_of_sequence);
if (length_of_sequence != saved_length_of_sequence)
printf (", %d saved by merging",
saved_length_of_sequence - length_of_sequence);
printf ("\n");
/* Unregister and clean up the merged steps. */
while (number_of_single_steps > saved_number_of_single_steps)
{
number_of_single_steps--;
step = single_step_array + number_of_single_steps;
if (step->one_to_one)
free ((void *) step->one_to_one);
if (step->one_to_many)
free ((void *) step->one_to_many);
}
}
}
}
/* Main program. */
/*-----------------------------------------------.
| Explain how to use the program, then get out. |
`-----------------------------------------------*/
void
usage (int status)
{
if (status != EXIT_SUCCESS)
fprintf (stderr, "Try `%s %s' for more information.\n", program_name,
decoding_charset_flag ? "--list" : "--help");
else
{
printf ("\
Usage: %s [OPTION]... [CHARSET]\n", program_name);
printf ("\
-C, --copyright display Copyright and copying conditions\n\
-l, --list[=FORMAT] list one or all known charsets\n\
--help display this help\n\
--version output version information\n\
\n\
FORMAT it is one of decimal, octal, hexadecimal or full, it defaults to\n\
decimal if CHARSET given. CHARSET defaults to %s if FORMAT given.\n",
DEFAULT_CHARSET);
printf ("\
Option -l with no FORMAT nor CHARSET list all charsets, also see the texinfo\n\
documentation. My preferred charsets are (each user has preferences):\n\
\n\
ascii-bs ASCII (7-bit), using backspace to apply diacritics\n\
ibmpc IBM-PC 8-bit characters, with proper newlines\n\
latex LaTeX coding of foreign and diacriticized characters\n\
latin1 ISO Latin-1 8-bit extension of ASCII\n\
texte Easy French convention for transmitting email messages\n");
printf ("\
\n\
Usage: %s [OPTION]... [BEFORE]:[AFTER] [FILE]...\n", program_name);
printf ("\
\n\
-a, --auto-check study all recoding paths, report, then exit\n\
-c, --colons use colons instead of double quotes for diaeresis\n\
-d, --diacritics limit conversion to diacritics or alike for LaTeX\n");
#if 0
printf ("\
-f, --force force recoding even if it is not reversible\n");
#endif
printf ("\
-g, --graphics approximate ibmpc rulers by ASCII graphics\n\
-h, --header[=NAME] write C code with table NAME on stdout, then exit\n\
-i, --sequence=files use intermediate files for sequencing passes\n");
#ifdef HAVE_POPEN
printf ("\
-o, --sequence=popen use popen machinery for sequencing passes\n");
#else
printf ("\
-o, --sequence=popen same as -i (on this system)\n");
#endif
#ifdef HAVE_PIPE
printf ("\
-p, --sequence=pipe use pipe machinery for sequencing passes\n");
#else
printf ("\
-p, --sequence=pipe same as -o (on this system)\n");
#endif
printf ("\
-s, --strict use strict mappings, even loose characters\n\
-t, --touch touch the recoded files after replacement\n\
-v, --verbose explain sequence of steps and report progress\n\
-x, --ignore=CHARSET ignore CHARSET while choosing a recoding path\n\
\n\
If none of -i, -o and -p are given, presume -p if no FILE, else -i.\n\
Each FILE is recoded over itself, destroying the original. If no\n\
FILE is specified, then act as a filter and recode stdin to stdout.\n");
}
exit (status);
}
/*----------------------------------------------------------------------.
| Main program. Decode ARGC arguments passed through the ARGV array of |
| strings, then launch execution. |
`----------------------------------------------------------------------*/
/* Long options equivalences. */
static const struct option long_options[] =
{
{"auto-check", no_argument, NULL, 'a'},
{"colons", no_argument, NULL, 'c'},
{"copyright", no_argument, NULL, 'C'},
{"diacritics", no_argument, NULL, 'd'},
{"force", no_argument, NULL, 'f'},
{"header", optional_argument, NULL, 'h'},
{"help", no_argument, &show_help, 1},
{"ignore", required_argument, NULL, 'x'},
{"list", optional_argument, NULL, 'l'},
{"sequence", required_argument, NULL, '\n'},
{"strict", no_argument, NULL, 's'},
{"touch", no_argument, NULL, 't'},
{"verbose", no_argument, NULL, 'v'},
{"version", no_argument, &show_version, 1},
{0, 0, 0, 0},
};
static const char *const format_strings[] =
{
"decimal",
"octal",
"hexadecimal",
"full",
NULL,
};
static const char *const sequence_strings[] =
{
"files",
"popen",
"pipe",
NULL,
};
int
main (int argc, char *const *argv)
{
extern int optind; /* index of argument */
int option_char; /* option character */
const char *input_name; /* input file name */
char output_name[200]; /* output file name */
FILE *file; /* file to check or stat */
#ifdef MSDOS
struct ftime stamp_stat; /* input file time stamps */
#else
struct stat stamp_stat; /* input file time stamps */
time_t stamp_utime[2]; /* recoded file time stamps */
#endif
char *cursor; /* all purpose cursor */
/* Decode command options. */
program_name = argv[0];
while (option_char = getopt_long (argc, argv, "aCcdfgh::il::opstvx:",
long_options, NULL),
option_char != EOF)
switch (option_char)
{
default:
usage (EXIT_FAILURE);
case '\0':
break;
case '\n':
switch (argmatch (optarg, sequence_strings))
{
case -2:
error (0, 0, "Ambiguous sequence `%s'", optarg);
usage (EXIT_FAILURE);
case -1:
error (0, 0, "Unknown sequence `%s'", optarg);
usage (EXIT_FAILURE);
case 0:
sequence_strategy = SEQUENCE_WITH_FILES;
break;
case 1:
sequence_strategy = SEQUENCE_WITH_POPEN;
break;
case 2:
sequence_strategy = SEQUENCE_WITH_PIPE;
break;
}
break;
case 'a':
auto_check_mode = 1;
break;
case 'C':
fprintf (stderr, "%s", copyright_string);
exit (EXIT_SUCCESS);
case 'c':
diaeresis_char = ':';
break;
case 'd':
diacritics_only = 1;
break;
case 'f':
force_option = 1;
break;
case 'g':
ascii_graphics = 1;
break;
case 'h':
make_header_mode = 1;
header_name = optarg;
break;
case 'i':
sequence_strategy = SEQUENCE_WITH_FILES;
break;
case 'l':
show_charsets = 1;
if (optarg)
switch (argmatch (optarg, format_strings))
{
case -2:
error (0, 0, "Ambiguous format `%s'", optarg);
usage (EXIT_FAILURE);
case -1:
error (0, 0, "Unknown format `%s'", optarg);
usage (EXIT_FAILURE);
case 0:
list_format = DECIMAL_FORMAT;
break;
case 1:
list_format = OCTAL_FORMAT;
break;
case 2:
list_format = HEXADECIMAL_FORMAT;
break;
case 3:
list_format = FULL_FORMAT;
break;
}
break;
case 'o':
sequence_strategy = SEQUENCE_WITH_POPEN;
break;
case 'p':
sequence_strategy = SEQUENCE_WITH_PIPE;
break;
case 's':
strict_mapping = 1;
break;
case 't':
touch_option = 1;
break;
case 'v':
verbose_option = 1;
break;
case 'x':
ignored_name = optarg;
break;
}
if (strict_mapping)
force_option = 1;
/* Process trivial options. */
if (show_version)
{
printf ("%s\n", version_string);
exit (EXIT_SUCCESS);
}
if (show_help)
usage (EXIT_SUCCESS);
/* Register all modules, then set the ignored charset. */
register_all_modules ();
make_argmatch_array ();
if (ignored_name)
find_charset (clean_charset_name (ignored_name))->ignore = 1;
/* Process charset listing options. */
if (show_charsets)
{
/* Select a possible charset and a default format. */
if (optind + 1 == argc)
list_charset = find_charset (clean_charset_name (argv[optind]));
else if (list_format != NO_FORMAT)
list_charset = find_charset (clean_charset_name (NULL));
else
list_charset = NULL;
/* List the charset(s) appropriately. */
if (list_charset)
if (list_format == FULL_FORMAT)
list_full_charset (list_charset);
else
list_concise_charset (list_charset);
else
list_all_charsets ();
/* Then get out. */
exit (EXIT_SUCCESS);
}
/* Process other options not requiring BEFORE:AFTER. */
if (auto_check_mode)
{
report_about_all_sequences ();
exit (EXIT_SUCCESS);
}
/* Prepare for decoding the BEFORE:AFTER argument. Split it. Then
guarantee a NULL at end of charset_name_array. */
if (optind + 1 > argc)
usage (EXIT_FAILURE);
decode_before_after (argv[optind++]);
/* Establish the sequence of recoding steps. */
length_of_sequence = 0;
find_sequence (before_charset, after_charset);
if (length_of_sequence < 0)
error (EXIT_FAILURE, 0, "No way to recode from %s to %s.",
before_charset->name, after_charset->name);
optimize_sequence ();
/* If we merely want C code, do it and get out. */
if (make_header_mode)
{
if (length_of_sequence == 0)
error (EXIT_FAILURE, 0, "No C code for the trivial recoding");
if (length_of_sequence > 1
|| !(sequence[0]->one_to_one || sequence[0]->one_to_many))
error (EXIT_FAILURE, 0, "Unable to deduce the recoding table");
output_header_file ();
exit (EXIT_SUCCESS);
}
/* If there is no input file, act as a filter. Else, recode all files
over themselves. */
if (optind < argc)
{
/* When reading and writing files, unless the user selected otherwise,
avoid forking and use intermediate files. */
if (sequence_strategy == STRATEGY_UNDECIDED)
sequence_strategy = SEQUENCE_WITH_FILES;
/* In case files are recoded over themselves and there is no
recoding step at all, do not even try to touch the files. */
if (length_of_sequence > 0)
/* Process files, one at a time. */
for (; optind < argc; optind++)
{
input_name = argv[optind];
/* Check if the file can be read and rewritten. */
if (file = fopen (input_name, "r+"), file == NULL)
error (EXIT_FAILURE, errno, input_name);
/* Save the input file time stamp. */
if (!touch_option)
{
#ifdef MSDOS
getftime (fileno (file), &stamp_stat);
#else
fstat (fileno (file), &stamp_stat);
#endif
}
fclose (file);
/* Choose an output file in the same directory. */
strcpy (output_name, input_name);
for (cursor = output_name + strlen (output_name);
cursor > output_name && cursor[-1] != '/'
#ifdef MSDOS
/* It has been reported this next line is also required
for OS/2 EMX. */
&& cursor[-1] != '\\' && cursor[-1] != ':'
#endif
&& cursor[-1] != '\\' && cursor[-1] != ':'
; cursor--)
;
strcpy (cursor, "recodeXX.TMP");
/* Recode the file. */
execute_sequence (input_name, output_name);
/* Move the new file over the original. */
if (unlink (input_name) < 0)
error (EXIT_FAILURE, errno, input_name);
#ifdef HAVE_RENAME
if (rename (output_name, input_name) < 0)
error (EXIT_FAILURE, errno, output_name);
#else
if (link (output_name, input_name) < 0)
error (EXIT_FAILURE, errno, output_name);
if (unlink (output_name) < 0)
error (EXIT_FAILURE, errno, output_name);
#endif
/* Adjust the time stamp for the new file. */
if (!touch_option)
{
#ifdef MSDOS
file = fopen (input_name, "r");
if (file == NULL)
error (EXIT_FAILURE, errno, input_name);
setftime (fileno (file), &stamp_stat);
fclose (file);
#else
stamp_utime[0] = stamp_stat.st_atime;
stamp_utime[1] = stamp_stat.st_mtime;
utime (input_name, stamp_utime);
#endif
}
}
}
else
{
/* When reading stdin and writing stdout, unless the user selected
otherwise, fork processes interconnected with pipes. */
if (sequence_strategy == STRATEGY_UNDECIDED)
sequence_strategy = SEQUENCE_WITH_PIPE;
execute_sequence (NULL, NULL);
}
exit (EXIT_SUCCESS);
}